### Ensemble Averaging and Merging

#### Configuration #####
library(dplyr)
library(randomForestSRC)
library(xgboost)
library(MLmetrics)
library(caret)

options(scipen=999)
setwd("E:/Fortis/Workspace/Views Competition/Update September 2020")




MSE()



## The R Version of TADDA

tadda <- function(x, y, epsilon){
  
  tad.1 <- abs(x - y)
  
  penalty.l1 <- ifelse(sign(x) + sign(y) >= 1|sign(x) + sign(y) <= -1, 0, 1)  
  penalty.l2 <- ifelse(tad.1>epsilon, penalty.l1, 0)
  
  tad.2 <- tad.1 + abs(y*penalty.l2)

  tad.3 <- mean(tad.2)       

      return(tad.3)
}




## test with s2
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$RF.GED.t2.s2, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$XGB.GED.t2.s2, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$RF.United.t2.s2, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$XGB.United.t2.s2, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$RF.GED.t2.s2.africa, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$XGB.GED.t2.s2.africa, epsilon=0.048)

tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$unw.ensemble, epsilon=0.048)




## test section
tad.1 <- abs(Ensemble.Eval.T2.s2$real - Ensemble.Eval.T2.s2$RF.GED.t2.s2)
epsilon=0.048
ifelse(tad.1>epsilon, 1, 0)
penalty.l1 <- ifelse(sign(Ensemble.Eval.T2.s2$real) + sign(Ensemble.Eval.T2.s2$RF.GED.t2.s2) >= 1|sign(Ensemble.Eval.T2.s2$real) + sign(Ensemble.Eval.T2.s2$RF.GED.t2.s2) <= -1, 0, 1)
penalty.l2 <- ifelse(tad.1>epsilon, penalty.l1, 0)

tadda.test.001 <- data.frame(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$RF.GED.t2.s2, penalty.l1, penalty.l2, Ensemble.Eval.T2.s2$RF.GED.t2.s2*penalty.l2)





## Task 2

## s-2
Pred.Ensemble.Task2.s2.africa <- read.csv("Pred_Ensemble_Task2_s2_africa.csv", header=TRUE)


Pred.Ensemble.Task2.s2.africa[, c(5,6,7,1,2,3,4,9,10,11)] -> Ensemble.Eval.T2.s2


 
write.csv(Ensemble.Eval.T2.s2, file="Ensemble_Eval_t2_s2.csv", row.names = FALSE)


tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$RF.GED.t2.s2, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$XGB.GED.t2.s2, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$RF.United.t2.s2, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$XGB.United.t2.s2, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$RF.GED.t2.s2.africa, epsilon=0.048)
tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$XGB.GED.t2.s2.africa, epsilon=0.048)

tadda(Ensemble.Eval.T2.s2$real, Ensemble.Eval.T2.s2$unw.ensemble, epsilon=0.048)





## s=3
Pred.Ensemble.Task2.s3.africa <- read.csv("Pred_Ensemble_Task2_s3_africa.csv", header=TRUE)
Pred.Ensemble.Task2.s3.africa[, c(5,6,7,1,2,3,4,9,10,11)] -> Ensemble.Eval.T2.s3

tadda(Ensemble.Eval.T2.s3$real, Ensemble.Eval.T2.s3$RF.GED.t2.s3, epsilon=0.048)
tadda(Ensemble.Eval.T2.s3$real, Ensemble.Eval.T2.s3$XGB.GED.t2.s3, epsilon=0.048)
tadda(Ensemble.Eval.T2.s3$real, Ensemble.Eval.T2.s3$RF.United.t2.s3, epsilon=0.048)
tadda(Ensemble.Eval.T2.s3$real, Ensemble.Eval.T2.s3$XGB.United.t2.s3, epsilon=0.048)
tadda(Ensemble.Eval.T2.s3$real, Ensemble.Eval.T2.s3$RF.GED.t2.s3.africa, epsilon=0.048)
tadda(Ensemble.Eval.T2.s3$real, Ensemble.Eval.T2.s3$XGB.GED.t2.s3.africa, epsilon=0.048)


## s=4
Pred.Ensemble.Task2.s4.africa <- read.csv("Pred_Ensemble_Task2_s4_africa.csv", header=TRUE)
Pred.Ensemble.Task2.s4.africa[, c(5,6,7,1,2,3,4,9,10,11)] -> Ensemble.Eval.T2.s4

tadda(Ensemble.Eval.T2.s4$real, Ensemble.Eval.T2.s4$RF.GED.t2.s4, epsilon=0.048)
tadda(Ensemble.Eval.T2.s4$real, Ensemble.Eval.T2.s4$XGB.GED.t2.s4, epsilon=0.048)
tadda(Ensemble.Eval.T2.s4$real, Ensemble.Eval.T2.s4$RF.United.t2.s4, epsilon=0.048)
tadda(Ensemble.Eval.T2.s4$real, Ensemble.Eval.T2.s4$XGB.United.t2.s4, epsilon=0.048)
tadda(Ensemble.Eval.T2.s4$real, Ensemble.Eval.T2.s4$RF.GED.t2.s4.africa, epsilon=0.048)
tadda(Ensemble.Eval.T2.s4$real, Ensemble.Eval.T2.s4$XGB.GED.t2.s4.africa, epsilon=0.048)


## s=5
Pred.Ensemble.Task2.s5.africa <- read.csv("Pred_Ensemble_Task2_s5_africa.csv", header=TRUE)
Pred.Ensemble.Task2.s5.africa[, c(5,6,7,1,2,3,4,9,10,11)] -> Ensemble.Eval.T2.s5
Ensemble.Eval.T2.s5$ln_ged_best_sb_s5 -> Ensemble.Eval.T2.s5$real


tadda(Ensemble.Eval.T2.s5$real, Ensemble.Eval.T2.s5$RF.GED.t2.s5, epsilon=0.048)
tadda(Ensemble.Eval.T2.s5$real, Ensemble.Eval.T2.s5$XGB.GED.t2.s5, epsilon=0.048)
tadda(Ensemble.Eval.T2.s5$real, Ensemble.Eval.T2.s5$RF.United.t2.s5, epsilon=0.048)
tadda(Ensemble.Eval.T2.s5$real, Ensemble.Eval.T2.s5$XGB.United.t2.s5, epsilon=0.048)
tadda(Ensemble.Eval.T2.s5$real, Ensemble.Eval.T2.s5$RF.GED.t2.s5.africa, epsilon=0.048)
tadda(Ensemble.Eval.T2.s5$real, Ensemble.Eval.T2.s5$XGB.GED.t2.s5.africa, epsilon=0.048)

## s=6
Pred.Ensemble.Task2.s6.africa <- read.csv("Pred_Ensemble_Task2_s6_africa.csv", header=TRUE)
Pred.Ensemble.Task2.s6.africa[, c(5,6,7,1,2,3,4,9,10,11)] -> Ensemble.Eval.T2.s6
Ensemble.Eval.T2.s6$ln_ged_best_sb_s6 -> Ensemble.Eval.T2.s6$real

tadda(Ensemble.Eval.T2.s6$real, Ensemble.Eval.T2.s6$RF.GED.t2.s6, epsilon=0.048)
tadda(Ensemble.Eval.T2.s6$real, Ensemble.Eval.T2.s6$XGB.GED.t2.s6, epsilon=0.048)
tadda(Ensemble.Eval.T2.s6$real, Ensemble.Eval.T2.s6$RF.United.t2.s6, epsilon=0.048)
tadda(Ensemble.Eval.T2.s6$real, Ensemble.Eval.T2.s6$XGB.United.t2.s6, epsilon=0.048)
tadda(Ensemble.Eval.T2.s6$real, Ensemble.Eval.T2.s6$RF.GED.t2.s6.africa, epsilon=0.048)
tadda(Ensemble.Eval.T2.s6$real, Ensemble.Eval.T2.s6$XGB.GED.t2.s6.africa, epsilon=0.048)



## s-7
Pred.Ensemble.Task2.s7.africa <- read.csv("Pred_Ensemble_Task2_s7_africa.csv", header=TRUE)


Pred.Ensemble.Task2.s7.africa[, c(3,4,5,1,2,6,7,10,11)] -> Ensemble.Eval.T2.s7
Pred.Ensemble.Task2.s7.africa$ln_ged_best_sb_s7 -> Ensemble.Eval.T2.s7$real

tadda(Ensemble.Eval.T2.s7$real, Ensemble.Eval.T2.s7$RF.GED.t2.s7, epsilon=0.048)
tadda(Ensemble.Eval.T2.s7$real, Ensemble.Eval.T2.s7$XGB.GED.t2.s7, epsilon=0.048)
tadda(Ensemble.Eval.T2.s7$real, Ensemble.Eval.T2.s7$RF.United.t2.s7, epsilon=0.048)
tadda(Ensemble.Eval.T2.s7$real, Ensemble.Eval.T2.s7$XGB.United.t2.s7, epsilon=0.048)
tadda(Ensemble.Eval.T2.s7$real, Ensemble.Eval.T2.s7$RF.GED.t2.s7.africa, epsilon=0.048)
tadda(Ensemble.Eval.T2.s7$real, Ensemble.Eval.T2.s7$XGB.GED.t2.s7.africa, epsilon=0.048)






#### TASK 3

# s-2
Pred.Ensemble.Task3.s2.africa <- read.csv("Pred_Ensemble_Task3_s2_africa.csv", header=TRUE)
Pred.Ensemble.Task3.s2.africa[, c(3,4,5,1,2,6,7,9,10)] -> Ensemble.Eval.T3.s2


## once per task
views.ged <- read.csv("views_ged.csv", header=TRUE)
views.ged.pred.t2  <- filter(views.ged, month_id >= 100 & month_id <= 499)


### adjust ln_variable line 57=s2 / 58=s3 etc.
views.ged.pred.t2[,c(2:3,57)] -> merge.true.t2.s2

### adjust month+ for every timestep s=2 --> +2
merge.true.t2.s2$month_id+2 -> merge.true.t2.s2$month_id
merge.true.t2.s2$ln_ged_best_sb_s2 -> merge.true.t2.s2$real

Ensemble.Eval.T3.s2 <- left_join(Ensemble.Eval.T3.s2, merge.true.t2.s2, by=c("month_id", "country_id"))

### MSE check
MSE(Ensemble.Eval.T3.s2$real, Ensemble.Eval.T3.s2$RF.GED.t3.s2)
## correct


## Taddas

tadda(Ensemble.Eval.T3.s2$real, Ensemble.Eval.T3.s2$RF.GED.t3.s2, epsilon=0.048)
tadda(Ensemble.Eval.T3.s2$real, Ensemble.Eval.T3.s2$XGB.GED.t3.s2, epsilon=0.048)
tadda(Ensemble.Eval.T3.s2$real, Ensemble.Eval.T3.s2$RF.United.t3.s2, epsilon=0.048)
tadda(Ensemble.Eval.T3.s2$real, Ensemble.Eval.T3.s2$XGB.United.t3.s2, epsilon=0.048)
tadda(Ensemble.Eval.T3.s2$real, Ensemble.Eval.T3.s2$RF.GED.t3.s2.africa, epsilon=0.048)
tadda(Ensemble.Eval.T3.s2$real, Ensemble.Eval.T3.s2$XGB.GED.t3.s2.africa, epsilon=0.048)


#+#


# s-3
Pred.Ensemble.Task3.s3.africa <- read.csv("Pred_Ensemble_Task3_s3_africa.csv", header=TRUE)
Pred.Ensemble.Task3.s3.africa[, c(3,4,5,1,2,6,7,9,10)] -> Ensemble.Eval.T3.s3

## once per task
views.ged <- read.csv("views_ged.csv", header=TRUE)
views.ged.pred.t2  <- filter(views.ged, month_id >= 100 & month_id <= 499)


### adjust ln_variable line 57=s2 / 58=s3 etc.
views.ged.pred.t2[,c(2:3,58)] -> merge.true.t2.s3

### adjust month+ for every timestep s=3 --> +3
merge.true.t2.s3$month_id+3 -> merge.true.t2.s3$month_id
merge.true.t2.s3$ln_ged_best_sb_s3 -> merge.true.t2.s3$real

Ensemble.Eval.T3.s3 <- left_join(Ensemble.Eval.T3.s3, merge.true.t2.s3, by=c("month_id", "country_id"))

### MSE check
MSE(Ensemble.Eval.T3.s3$real, Ensemble.Eval.T3.s3$RF.GED.t3.s3)
## correct


## Taddas

tadda(Ensemble.Eval.T3.s3$real, Ensemble.Eval.T3.s3$RF.GED.t3.s3, epsilon=0.048)
tadda(Ensemble.Eval.T3.s3$real, Ensemble.Eval.T3.s3$XGB.GED.t3.s3, epsilon=0.048)
tadda(Ensemble.Eval.T3.s3$real, Ensemble.Eval.T3.s3$RF.United.t3.s3, epsilon=0.048)
tadda(Ensemble.Eval.T3.s3$real, Ensemble.Eval.T3.s3$XGB.United.t3.s3, epsilon=0.048)
tadda(Ensemble.Eval.T3.s3$real, Ensemble.Eval.T3.s3$RF.GED.t3.s3.africa, epsilon=0.048)
tadda(Ensemble.Eval.T3.s3$real, Ensemble.Eval.T3.s3$XGB.GED.t3.s3.africa, epsilon=0.048)


#+#



# s-4
Pred.Ensemble.Task3.s4.africa <- read.csv("Pred_Ensemble_Task3_s4_africa.csv", header=TRUE)
Pred.Ensemble.Task3.s4.africa[, c(3,4,5,1,2,6,7,9,10)] -> Ensemble.Eval.T3.s4

## once per task
views.ged <- read.csv("views_ged.csv", header=TRUE)
views.ged.pred.t2  <- filter(views.ged, month_id >= 100 & month_id <= 499)


### adjust ln_variable line 57=s2 / 58=s3 etc.
views.ged.pred.t2[,c(2:3,59)] -> merge.true.t2.s4

### adjust month+ for every timestep s=3 --> +3
merge.true.t2.s4$month_id+4 -> merge.true.t2.s4$month_id
merge.true.t2.s4$ln_ged_best_sb_s4 -> merge.true.t2.s4$real

Ensemble.Eval.T3.s4 <- left_join(Ensemble.Eval.T3.s4, merge.true.t2.s4, by=c("month_id", "country_id"))

### MSE check
MSE(Ensemble.Eval.T3.s4$real, Ensemble.Eval.T3.s4$RF.GED.t3.s4)
## correct


## Taddas

tadda(Ensemble.Eval.T3.s4$real, Ensemble.Eval.T3.s4$RF.GED.t3.s4, epsilon=0.048)
tadda(Ensemble.Eval.T3.s4$real, Ensemble.Eval.T3.s4$XGB.GED.t3.s4, epsilon=0.048)
tadda(Ensemble.Eval.T3.s4$real, Ensemble.Eval.T3.s4$RF.United.t3.s4, epsilon=0.048)
tadda(Ensemble.Eval.T3.s4$real, Ensemble.Eval.T3.s4$XGB.United.t3.s4, epsilon=0.048)
tadda(Ensemble.Eval.T3.s4$real, Ensemble.Eval.T3.s4$RF.GED.t3.s4.africa, epsilon=0.048)
tadda(Ensemble.Eval.T3.s4$real, Ensemble.Eval.T3.s4$XGB.GED.t3.s4.africa, epsilon=0.048)


#+#



# s-5
Pred.Ensemble.Task3.s5.africa <- read.csv("Pred_Ensemble_Task3_s5_africa.csv", header=TRUE)
Pred.Ensemble.Task3.s5.africa[, c(3,4,5,1,2,6,7,9,10)] -> Ensemble.Eval.T3.s5

## once per task
views.ged <- read.csv("views_ged.csv", header=TRUE)
views.ged.pred.t2  <- filter(views.ged, month_id >= 100 & month_id <= 499)


### adjust ln_variable line 57=s2 / 58=s3 etc.
views.ged.pred.t2[,c(2:3,60)] -> merge.true.t2.s5

### adjust month+ for every timestep s=3 --> +3
merge.true.t2.s5$month_id+5 -> merge.true.t2.s5$month_id
merge.true.t2.s5$ln_ged_best_sb_s5 -> merge.true.t2.s5$real

Ensemble.Eval.T3.s5 <- left_join(Ensemble.Eval.T3.s5, merge.true.t2.s5, by=c("month_id", "country_id"))

### MSE check
MSE(Ensemble.Eval.T3.s5$real, Ensemble.Eval.T3.s5$RF.GED.t3.s5)
## correct


## Taddas

tadda(Ensemble.Eval.T3.s5$real, Ensemble.Eval.T3.s5$RF.GED.t3.s5, epsilon=0.048)
tadda(Ensemble.Eval.T3.s5$real, Ensemble.Eval.T3.s5$XGB.GED.t3.s5, epsilon=0.048)
tadda(Ensemble.Eval.T3.s5$real, Ensemble.Eval.T3.s5$RF.United.t3.s5, epsilon=0.048)
tadda(Ensemble.Eval.T3.s5$real, Ensemble.Eval.T3.s5$XGB.United.t3.s5, epsilon=0.048)
tadda(Ensemble.Eval.T3.s5$real, Ensemble.Eval.T3.s5$RF.GED.t3.s5.africa, epsilon=0.048)
tadda(Ensemble.Eval.T3.s5$real, Ensemble.Eval.T3.s5$XGB.GED.t3.s5.africa, epsilon=0.048)


#+#

# s-6
Pred.Ensemble.Task3.s6.africa <- read.csv("Pred_Ensemble_Task3_s6_africa.csv", header=TRUE)
Pred.Ensemble.Task3.s6.africa[, c(3,4,5,1,2,6,7,9,10)] -> Ensemble.Eval.T3.s6

## once per task
views.ged <- read.csv("views_ged.csv", header=TRUE)
views.ged.pred.t2  <- filter(views.ged, month_id >= 100 & month_id <= 499)


### adjust ln_variable line 57=s2 / 58=s3 etc.
views.ged.pred.t2[,c(2:3,61)] -> merge.true.t2.s6

### adjust month+ for every timestep s=3 --> +3
merge.true.t2.s6$month_id+6 -> merge.true.t2.s6$month_id
merge.true.t2.s6$ln_ged_best_sb_s6 -> merge.true.t2.s6$real

Ensemble.Eval.T3.s6 <- left_join(Ensemble.Eval.T3.s6, merge.true.t2.s6, by=c("month_id", "country_id"))

### MSE check
MSE(Ensemble.Eval.T3.s6$real, Ensemble.Eval.T3.s6$RF.GED.t3.s6)
## correct


## Taddas

tadda(Ensemble.Eval.T3.s6$real, Ensemble.Eval.T3.s6$RF.GED.t3.s6, epsilon=0.048)
tadda(Ensemble.Eval.T3.s6$real, Ensemble.Eval.T3.s6$XGB.GED.t3.s6, epsilon=0.048)
tadda(Ensemble.Eval.T3.s6$real, Ensemble.Eval.T3.s6$RF.United.t3.s6, epsilon=0.048)
tadda(Ensemble.Eval.T3.s6$real, Ensemble.Eval.T3.s6$XGB.United.t3.s6, epsilon=0.048)
tadda(Ensemble.Eval.T3.s6$real, Ensemble.Eval.T3.s6$RF.GED.t3.s6.africa, epsilon=0.048)
tadda(Ensemble.Eval.T3.s6$real, Ensemble.Eval.T3.s6$XGB.GED.t3.s6.africa, epsilon=0.048)


#+#


# s-7
Pred.Ensemble.Task3.s7.africa <- read.csv("Pred_Ensemble_Task3_s7_africa.csv", header=TRUE)
Pred.Ensemble.Task3.s7.africa[, c(3,4,5,1,2,6,7,9,10)] -> Ensemble.Eval.T3.s7

## once per task
views.ged <- read.csv("views_ged.csv", header=TRUE)
views.ged.pred.t2  <- filter(views.ged, month_id >= 100 & month_id <= 499)


### adjust ln_variable line 57=s2 / 58=s3 etc.
views.ged.pred.t2[,c(2:3,62)] -> merge.true.t2.s7

### adjust month+ for every timestep s=3 --> +3
merge.true.t2.s7$month_id+7 -> merge.true.t2.s7$month_id
merge.true.t2.s7$ln_ged_best_sb_s7 -> merge.true.t2.s7$real

Ensemble.Eval.T3.s7 <- left_join(Ensemble.Eval.T3.s7, merge.true.t2.s7, by=c("month_id", "country_id"))

### MSE check
MSE(Ensemble.Eval.T3.s7$real, Ensemble.Eval.T3.s7$RF.GED.t3.s7)
## correct


## Taddas

tadda(Ensemble.Eval.T3.s7$real, Ensemble.Eval.T3.s7$RF.GED.t3.s7, epsilon=0.048)
tadda(Ensemble.Eval.T3.s7$real, Ensemble.Eval.T3.s7$XGB.GED.t3.s7, epsilon=0.048)
tadda(Ensemble.Eval.T3.s7$real, Ensemble.Eval.T3.s7$RF.United.t3.s7, epsilon=0.048)
tadda(Ensemble.Eval.T3.s7$real, Ensemble.Eval.T3.s7$XGB.United.t3.s7, epsilon=0.048)
tadda(Ensemble.Eval.T3.s7$real, Ensemble.Eval.T3.s7$RF.GED.t3.s7.africa, epsilon=0.048)
tadda(Ensemble.Eval.T3.s7$real, Ensemble.Eval.T3.s7$XGB.GED.t3.s7.africa, epsilon=0.048)


#+#